# set working directory to location of the R file
#setwd("replication/scripts")

library(readr); library(dplyr);
library(AER); library(stargazer)
library(plm)
library(interflex)

FirmPanel <- read_csv("../Data/FirmPanel_10102018.csv")
only_treat <- subset(FirmPanel, is.na(agency)==F & time != "2017_2" & time != "2014_4" & 
                       time != "2014_3" & time != "2014_2" & time != "2014_1")

only_treat$di2 <- log(only_treat$rev_adj+.5) - only_treat$lag_rev 

only_treat$di_lag <- only_treat$lag_rev - only_treat$lag_rev2  

p_only <- only_treat %>%
  arrange(employer.id, time) %>%
  group_by(employer.id) %>%
  mutate(lag_rev1 = dplyr::lag(log_rev, order_by = time),
         lag_rev2 = dplyr::lag(log_rev, 2, order_by = time),
         lag_rev3 = dplyr::lag(log_rev, 3, order_by = time),
         lag_rev4 = dplyr::lag(log_rev, 4, order_by = time),
         lag_rev5 = dplyr::lag(log_rev, 5, order_by = time),
         di_lag2 = lag_rev2-lag_rev3)


p_only <- pdata.frame(p_only, index = c("employer.id", "time"))
p_only$treated_within <- p_only$treated*p_only$treated_period


mod1 <- plm(di2 ~ treated + treated_period + treated_within, #+ 
             #di_lag + log(n_contracts) + log(active_lobbyists) , 
           model = "pooling", effect = "individual",
           data = p_only) 

coeftest(mod1, vcovBK(mod1, cluster = "time"))

se1 <- sqrt(diag(vcovBK(mod1, cluster = "time")))

mod2 <- plm(di2 ~ treated + treated_period + treated_within+ 
           #di_lag 
           + log(n_contracts) + log(active_lobbyists) , 
           model = "pooling", effect = "individual",
           data = p_only) 

se2 <- sqrt(diag(vcovBK(mod2, cluster = "time")))

coeftest(mod2, vcovBK(mod2, cluster = "time"))

mod3 <- plm(di2 ~ treated + treated_period + treated_within + 
             di_lag*time + 
             log(n_contracts)*time + log(active_lobbyists)*time , 
             model = "pooling", effect = "twoway",
           data = p_only) 
se3 <- sqrt(diag(vcovBK(mod3, cluster = "time")))

coeftest(mod3, vcovBK(mod3, cluster = "time"))


mod4 <- plm(di2 ~ treated_within + 
             di_lag*time + 
             log(n_contracts)*time + log(active_lobbyists)*time , 
           model = "within", effect = "twoway",
           data = p_only) 

se4 <- sqrt(diag(vcovBK(mod4, cluster = "time")))

coeftest(mod4, vcovBK(mod4, cluster = "time"))

(0.363952 - 2*0.030857)*median(p_only$rev_adj)
(0.363952 + 2*0.030857)*median(p_only$rev_adj)

quantile(p_only$rev_adj, 0.1)
NoLow <- subset(p_only, rev_adj > quantile(p_only$rev_adj, 0.1))

mod5 <- plm(di2 ~ treated + treated_period + treated_within, #+ 
              #di_lag*time + 
              #log(n_contracts)*time + log(active_lobbyists)*time , 
            model = "pooling", effect = "twoway",
            data = NoLow) 
se5 <- sqrt(diag(vcovBK(mod5, cluster = "time")))

coeftest(mod5, vcovBK(mod5, cluster = "time"))



p_only$placebo_period <- ifelse(p_only$time == "2016_1", 1, 0)

mod6 <- plm(di2 ~ treated*placebo_period, #+ 
            #di_lag*time + 
            #log(n_contracts)*time + log(active_lobbyists)*time , 
            model = "pooling", effect = "twoway",
            data = p_only) 
se6 <- sqrt(diag(vcovBK(mod6, cluster = "time")))

coeftest(mod6, vcovBK(mod6, cluster = "time"))


stargazer(mod1, mod2, mod3, mod4, mod5, mod6,
          se = list(se1, se2, se3, se4, se5, se6),
          keep = "treat",
           covariate.labels = c("Treated", "Treated Period",
                                "Treated X Treated Period", "Treated X Placebo Period"),
          add.lines = list(c("Controls?", "No", "Yes", "Yes", "Yes", "No", "No"),
                           c("Time FE X Controls?", "No", "No", "Yes", "Yes", "No", "No"),
                           c("Time FE X Revenue t-1?", "No", "No", "Yes", "Yes", "No", "No"),
                           c("Firm FE?", "No", "No", "No", "Yes", "No", "No"),
                           c("Time FE?", "No", "No", "No", "Yes", "No", "No")),
          omit.stat = c("f", "rsq", "adj.rsq"), df = F,
          dep.var.labels = c("Change in ln Revenue"),
          title = "Lobbyist Appointment to the Trump Administration and Firm Revenue",
          label = "main",
          out = c("../tables/TableE17.tex")
)

p_only$dd_term <- p_only$treated*p_only$treated_period

don_est <- interflex(estimator = "binning", data = as.data.frame(p_only),
          Y = "di2", D = "dd_term", X = "net_prop", na.rm=T,
          FE = c("employer.id", "time"))

p<-ggplot(as.data.frame(don_est$est.bin$`1`), aes(x = x0, y = coef)) +
  geom_point()+
  geom_errorbar(aes(ymin = CI.lower, ymax = CI.upper, width = 0)) +
  geom_hline(yintercept = 0, lty = 2)+
  theme_classic() +
  labs(x = "Net Proportion of Donations to Republicans",
       y = "Difference-in-Differences Estimate\nof Connection to Trump Admin")+
  scale_x_continuous(limits = c(-1,1),
                     breaks = c(-1, -0.5, 0, 0.5, 1),
                     labels = c("-1\nAll D", "-0.5", "0\nNo Donations", "0.5",
                                "1\nAll R"))
ggsave(filename = "../images/FigureE11.pdf",
       width = 6, height = 5)

###########################################################
# Descriptive statistics for the appendix.


## !! NOTE: these two .tex files must be manually combined to form the Table E16 in the manuscript
desc_con <- select(only_treat, rev_adj, total_firm_donations, prop_rep,
                   active_lobbyists, n_contracts)
stargazer(as.data.frame(desc_con),
          covariate.labels = c("Revenue", "Total Donations",
                               "Prop. Donations to R", "Active Lobbyists",
                               "Number of Contracts"), 
          summary.stat = c("n", "mean", "sd", "min", "max"),
          title = "Descriptive Statistics (Trump Case Study)",
          label = "tab:desc",
          out = "../tables/TableE16_part1.tex")

desc <- select(FirmPanel, rev_adj, total_firm_donations, prop_rep,
                   active_lobbyists, n_contracts)
stargazer(as.data.frame(desc),
          covariate.labels = c("Revenue", "Total Donations",
                               "Prop. Donations to R", "Active Lobbyists",
                               "Number of Contracts"),
          summary.stat = c("n", "mean", "sd", "min", "max"),
          title = "Descriptive Statistics (Trump Case Study)",
          label = "tab:desc",
          out = "../tables/TableE16_part2.tex")

